home *** CD-ROM | disk | FTP | other *** search
/ Amiga Format CD 42 / Amiga Format AFCD42 (Issue 126, Aug 1999).iso / -serious- / programming / arexx / import_html / import_html.rexx < prev    next >
OS/2 REXX Batch file  |  1999-05-25  |  11KB  |  253 lines

  1. /*      Import_HTML
  2. $VER: Import_HTML 3.0 by MJ, Adrian Barnett, Heiko Kuschel
  3.                 Imports an HTML (Web) file into Wordworth.
  4.                 All HTML codes are stripped out, leaving
  5.                 the text. Also, codes for different headings,
  6.                 and bold, italic and underline styles are
  7.                 recognised.
  8.  
  9.                 Digita ARexx Script for Wordworth 5
  10.                 Copyright ©1996, Digita International Ltd.
  11.                 
  12.                 Created: 2 July 1996
  13.                 Author:  MJ
  14.  
  15.                 Version 2.0
  16.                 Modified: 7 May 1997
  17.                 Author: Adrian Barnett (adrian@abarnett.demon.co.uk)
  18.                 Changes: Handle long lines of text
  19.                         Handle lists
  20.                         Deal with newlines correctly
  21.                         Center text correctly
  22.                         Deal with lower-case html tags
  23.  
  24.                         This still needs a lot of work...
  25.                         ... ok, so I did some of it...
  26.  
  27.                 Version 3.0
  28.                 Modified: 11 Nov 1998
  29.                 Uploaded: June 1999 (sorry for the big delay)
  30.                 Author: Heiko Kuschel <hkuschel@mayn.de>
  31.                 Changes: Handle German Umlauts
  32.                          Major speed improvement
  33.                          Font names and sizes configurable
  34.                          to do:
  35.  
  36.                          list font
  37.                          rework the title handling
  38.                          tables using TurboCalc
  39.                          tables using Tabs (configurable)
  40.                          More speed improvements possible!
  41.                          IIRC German Umlauts are always printed UPPERCASE in this version.
  42.                          I already had changed this, but lost the code.
  43. */
  44.  
  45.  
  46. /* --------------   Configure here   --------------- */
  47.  
  48. StandardFont = "Garamond Antiqua"
  49. H1=25 /* Font sizes in pt.*/
  50. H2=20
  51. H3=18
  52. H4=15
  53. H5=14
  54. H6=13
  55. H0=12 /* This is the standard font size.*/
  56.  
  57. /* ----- Nothing to configure after this line. ----- */
  58.  
  59.  
  60.  
  61. OPTIONS RESULTS
  62.  
  63. numch = 0
  64.  
  65. RequestFile TITLE "Select HTML file..." PATTERN "(#?.HTML|#?.HTM)"
  66. FileName = Result
  67.  
  68. If RC > 0 THEN
  69.         Exit
  70.  
  71.  
  72. If Open('MyFile', FileName, R) THEN DO
  73.  
  74.         New
  75.         Address Value Result
  76.  
  77.         Document A4 "0.5in" "0.5in" "0.6in" "1.0in"
  78.         Zoom 100
  79.         Paragraph 0 0 0 LEFT AUTO SINGLE NONE NONE
  80.  
  81.         Font NAME StandardFont SIZE H0 PLAIN
  82.  
  83.         Para = ''
  84.         numch = 0
  85.  
  86.         DO Until EOF('MyFile')
  87.                 MyChar = ReadCh('MyFile')
  88.  
  89.                 /* Handle "<>" tags */
  90.                 select
  91.                     when MyChar = '<' THEN DO
  92.                             Text Para
  93.                             Para = ''
  94.                             Code = MyChar
  95.                             DO UNTIL MyChar = '>'
  96.                                     MyChar = ReadCh('MyFile')
  97.                                     Code = Code || MyChar
  98.                             END
  99.  
  100.                             /* Convert tag to upper case */
  101.                             Code = UPPER(Code)
  102.                             SubCode = SubStr(Code, 1, 3)
  103.                             SubCode2 = SubStr(Code, 1, 4)
  104.                             SubCode3 = SubStr(Code, 1, 2)
  105.                             select
  106.                                when Code = '<P>' THEN do
  107.                                        NewParagraph
  108.                                        NewParagraph
  109.                                end
  110.                                when Code = '<BR>' THEN
  111.                                        NewParagraph
  112.                                when Code = '<B>' THEN
  113.                                        Bold
  114.                                when Code = '<I>' | Code = '<ADDRESS>' THEN
  115.                                        Italic
  116.                                when Code = '<U>' THEN
  117.                                        Underline
  118.                                when Code = '</B>' | Code = '</I>' | Code = '</U>' | Code = '</ADDRESS>' THEN
  119.                                        Plain
  120.                                when SubCode3 = '<H' then DO
  121.                                    Newparagraph
  122.                                    select
  123.                                        when Code = '<H1>' THEN
  124.                                                Font SIZE H1
  125.                                        when Code = '<H2>' THEN
  126.                                                Font SIZE H2
  127.                                        when Code = '<H3>' THEN
  128.                                                Font SIZE H3
  129.                                        when Code = '<H4>' THEN
  130.                                                Font SIZE H4
  131.                                        when Code = '<H5>' THEN
  132.                                                Font SIZE H5
  133.                                        when Code = '<H6>' THEN
  134.                                                Font SIZE H6
  135.                                        when Code = '<HR>' THEN do
  136.                                                text "___________________________________________________________"
  137.                                                NewParagraph
  138.                                        end
  139.                                        otherwise NOP
  140.                                    end
  141.                                end
  142.                                when Code = '<LI>' THEN DO
  143.                                        NewParagraph
  144.                                        text "o "
  145.                                END
  146.                                when Code = '</UL>' | Code = '</OL>' | Code = '</DIR>' | Code = '</MENU>' THEN
  147.                                        NewParagraph 
  148.  
  149.                                when Subcode = '</H' THEN DO
  150.                                        NewParagraph
  151.                                        Font SIZE 12
  152.                                END
  153.                                when SubCode2 = '<IMG' THEN DO
  154.                                        text " [image] "
  155.                                END
  156.  
  157.                                when Code = '<CENTER>' THEN
  158.                                        CentreJustify
  159.                                when Code = '</CENTER>' THEN do
  160.                                        NewParagraph
  161.                                        LeftJustify
  162.                                end
  163.                                when Code = '<TITLE>' THEN DO
  164.                                        Title = ''
  165.                                        DO UNTIL MyChar = '<'
  166.                                                MyChar = ReadCh('MyFile')
  167.                                                IF MyChar = '&' THEN DO
  168.  
  169.                                                   /* Handle things like "    */
  170.                                                   Code = MyChar
  171.                                                   DO UNTIL MyChar = ';'
  172.                                                           MyChar = ReadCh('MyFile')
  173.                                                           Code = Code || MyChar
  174.                                                   END
  175.  
  176.                                                   /* Convert tag to upper case */
  177.                                                   Code = UPPER(Code)
  178.  
  179.                                                   IF Code = '"' THEN TITLE = TITLE || """"
  180.                                                   else IF Code = '>' THEN TITLE = TITLE || ">"
  181.                                                   else IF Code = '<' THEN TITLE = TITLE || "<"
  182.                                                   else IF Code = '&' THEN TITLE = TITLE || "&"
  183.                                                   else IF Code = '&POUND;' THEN TITLE = TITLE || "£"
  184.                                                   else IF Code = '&AUML;' THEN TITLE = TITLE || "ä"
  185.                                                   else IF Code = '&OUML;' THEN TITLE = TITLE || "ö"
  186.                                                   else IF Code = '&UUML;' THEN TITLE = TITLE || "ü"
  187.                                                   else IF Code = '&S